# import requests
#
# import re
#
# url = 'https://www.yituyu.com/'
# response = requests.get(url)
# print(response.status_code)
# print(response.text)
#
#
# html_data = re.findall(r'<a href="https://www.yituyu.com/gallery/(\d+)/" target="_blank"><img src="https://img.yituyu.com/gallery/(\d+)/cover.jpg"  alt="(.*?)"></a>',response.text)
# print(html_data)


import requests
import re,os

url = 'https://www.yituyu.com/'
response = requests.get(url)
headers = {
    # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
    'Referer': 'https://www.yituyu.com/',
    # 'cookie':'Path=/; PHPSESSID=kb62amjcid6plbur3g0aqvj9jf; Path=/; yituyu_first_time=1761576106000; yituyu_os=Windows%20NT%2010.0%3B%20Win64%3B%20x64; Hm_lvt_9714eb07ec1e2c497aefe3d4dfded3ed=1761576107; HMACCOUNT=39540A5BB5D72A00; yituyu_id=3235a39081dfc39435eb7881c6af6af1; yituyu_id_uid=33716; yituyu_id_last_login_ip=106.33.170.236; yituyu_id_last_login_time=1761576556; yituyu_id_login_time=0.45846500%201761576556; Hm_lpvt_9714eb07ec1e2c497aefe3d4dfded3ed=1761576617'
}

# 正则：提取整个 img 标签块（包含链接和名字）
pattern = re.compile(
    r'<a href="https://www\.yituyu\.com/gallery/\d+/".*?'
    r'<img src="(https://img\.yituyu\.com/gallery/\d+/cover\.jpg)".*?'
    r'.*?<div class="chujingren">.*?<a href="https://www\.yituyu\.com/grapher/\d+/">\s*(.*?)\s*</a>',
    re.S
)

folder_name = '艺图语'
if not os.path.exists(folder_name):
    os.mkdir(folder_name)

matches = pattern.findall(response.text)

# 打印结果
for img_url, author in matches:
    img_response = requests.get(img_url, headers=headers)
    file_path = os.path.join(folder_name, f'{author}.jpg')
    # print(img_url, author)
    with open(file_path, 'wb')as f:
        f.write(img_response.content)